In [1]:
import math
import pandas as pd
import BQhelper as bq
import matplotlib.pyplot as plt
bq.project = "mlab-sandbox"
# bq.dataset = 'mattmathis'
# bq.UnitTestRunQuery()
# bq.UnitTestWriteQuery()
In [2]:
query="""
SELECT
a.TestTime,
client.IP,
a.MeanThroughputMbps,
node._instruments
# FROM `mlab-sandbox.mm_unified_testing.unified_downloads`
FROM `measurement-lab.ndt.unified_downloads`
WHERE client.IP in ( {clientIP} )
AND test_date > '2019-03-01'
ORDER BY TestTime
"""
In [3]:
%matplotlib nbagg
# from matplotlib import interactive
# interactive(True)
plt.ion()
clients = [
'69.68.23.44', # Max deltaMean
'96.229.66.58' # Max deltaMax
]
def plotMultiBeacons(clients, columns=1, width=10, data=None, selector='downloads'):
if data is None:
clist = '"'+'", "'.join(clients)+'"'
data=bq.QueryTimestampTimeseries(query, clientIP=clist, selector=selector)
global StashData # Skip slow queries when debugging
StashData = data
rows = math.ceil(len(clients) / float(columns))
figLen = width/float(columns)*rows # assume square subplots
print('Size', figLen, width)
plt.rcParams['figure.figsize'] = [ width, figLen]
fig, axs = plt.subplots(nrows=rows, ncols=columns, squeeze=False, sharex='all')
for ax, client in zip([i for j in axs for i in j], clients):
print ('Beacon: '+client)
ax.set_title('Beacon: '+client)
cdata = data[data['IP'] == client]
ax.plot(cdata['MeanThroughputMbps'][cdata["_instruments"] == 'web100'], 'b.',
cdata['MeanThroughputMbps'][cdata["_instruments"] == 'tcpinfo'], 'r.')
fig.autofmt_xdate()
fig.show()
# plotMultiBeacons(clients, 2, width=10)
In [4]:
# NB: ThousandRandomBeacons4perDay are all domotz tests, which are unspeakably ugly.
listq="""
With
Beacons AS (
SELECT
clientIP,
node._Instruments AS vers,
count(*) AS tests,
MIN(a.MeanThroughputMbps) AS minTput,
SUM(a.MeanThroughputMbps) /Count(*) AS meanTput,
STDDEV(a.MeanThroughputMbps) /Count(*) AS stddevTput,
MAX(a.MeanThroughputMbps) AS maxTput,
MIN(a.MinRTT ) AS minMinRTT,
SUM(a.MinRTT ) /Count(*) AS meanMinRTT,
MAX(a.MinRTT ) AS maxMinRTT
FROM
`measurement-lab.ndt.unified_downloads` -- Remember to test both uploads and downloads
JOIN `mlab-sandbox.mattmathis.ThousandRandomBeacons4perDay` ON Client.IP = clientIP
WHERE
test_date BETWEEN '2019-03-01' AND '2020-03-01'
AND client.Geo.country_code = "US"
AND (client.Geo.region = "MI" OR client.Geo.region = "VA")
GROUP BY
node._Instruments, clientIP
),
Paired AS (
SELECT
NDT5.meanTput - NDTlegacy.meanTput AS delta,
NDT5.maxTput - NDTlegacy.maxTput AS deltaMax,
ABS(NDT5.meanTput - NDTlegacy.meanTput ) / SQRT(NDT5.stddevTput*NDTlegacy.stddevTput) AS spread,
NDTlegacy.stddevTput / NDT5.stddevTput AS stablization,
NDT5,
NDTlegacy,
FROM ( SELECT * FROM Beacons WHERE vers = 'tcpinfo' ) AS NDT5
JOIN ( SELECT * FROM Beacons WHERE vers = 'web100' ) AS NDTlegacy USING ( clientIP )
)
# Choose
SELECT * FROM Paired ORDER BY {selector} LIMIT 30
"""
In [5]:
selector="deltaMax desc"
selector="delta desc"
selector="stablization desc"
stepbeacons=bq.DataFrameQuery(listq, selector=selector)
In [ ]:
print (stepbeacons)
In [9]:
# lame
beacons = []
for t in stepbeacons['NDT5']:
beacons.append(t['clientIP'])
plotMultiBeacons(beacons, 2, width=10)
In [7]:
# Particularly ugly,
# plotMultiBeacons(['67.161.83.114'], 1, width=10)
In [8]:
SELECT
COUNT (*) AS tests,
client.IP,
MIN( test_date ) AS firstFast
FROM
`measurement-lab.ndt.unified_downloads`
WHERE
client.Geo.country_code = "US"
AND client.Geo.region = "VA"
AND a.MeanThroughputMbps > 100.0
AND test_date > '2018-01-01'
GROUP BY client.IP
HAVING tests > 100
ORDER BY firstFast
LIMIT
2000